In [1]:
%autosave 10
In [5]:
import numpy as np
import biggus
np_array = np.empty((700, 200), dtype=int32)
arr = biggus.NumpyArrayAdapter(np_array)
print arr
In [6]:
# np.concatenate
bigger_arr = biggus.LinearMosaic([arr, arr], axis=0)
print bigger_arr
In [7]:
# no memory copying
print biggus.LinearMosaic([arr, arr] * 20, axis=0)
In [8]:
# new dimension
biggus.ArrayStack(np.array([arr, arr]))
Out[8]:
In [ ]:
import h5py
hdf_dataset = h5py.File('data.hdf5')['arange']
# this is lazy; no data is loaded
arr_hdf = biggus.NumpyArrayAdapter(hdf_dataset)
print arr
LinearMosaic) HDF5 and regular arrays.
In [ ]:
bigger_arr = biggus.LinearMosaic([bigger_arr, arr_hdf], axis=0)
print bigger_arr
The ndarray method realizes arrays, and brings it into memory.
In [11]:
type(bigger_arr.ndarray()), bigger_arr.ndarray().shape
Out[11]:
You can do basic processing on massive arrays in chunks.
In [12]:
# These operations don't run when you do this
mean = biggus.mean(bigger_arr, axis=0)
std = biggus.std(bigger_arr, axis=0)
In [13]:
print mean
In [14]:
# _now_ we realize it, calculate mean in mean.ndarray()
# done is chunks, data never all in-memory
print np.all(mean.ndarray() == bigger_arr.ndarray().mean(axis=0))
Really though as you go chunk-by-chunk you want to do many operations at the same time.
In [15]:
# this realizes the result. it really is chunking the array
# into sub-arrays, aggregating results
mean_np, std_np = biggus.ndarrays([mean, std])
print type(mean_np)
In [18]:
import h5py
with h5py.File('result.hdf5', mode='w') as f_out:
df = f_out.create_dataset('my_result', mean.shape, mean.dtype)
biggus.save([mean], [df])
!!AI see video.